
clear all
set more off

global dir /Volumes/Zihao_SSD2/PatentsView

*** ============================================================================================================
*** Table B13. Impose cutoff based on underlying cosine similarity
*** Cosine similarity >= x (x=0.75, 0.8, 0.85, 0.9)
*** Zihao Li. 11/2024
*** ============================================================================================================

import delimited $dir/temp/omission_panel_cutoff.csv, clear
keep patent_id cited_patent_id omission sim_score
rename (patent_id cited_patent_id) (patent_id_i patent_id_j)

* Merge with citing patents (i) and cited patents (j)
merge m:1 patent_id_i using $dir/temp/patent_i.dta
drop if _merge!=3
drop _merge
keep omission allfemale_09_100_i leadfemale_09_100_i sim_score num_citations_i num_inventors_i avg_experience_i patent_year_i assignee_id_i main_cpc_section_i main_cpc_subclass_i assignee_country_i xi_real_i patent_id_i patent_id_j
merge m:1 patent_id_j using $dir/temp/patent_j.dta
drop if _merge!=3
drop _merge
drop if sim_score == 1
keep omission allfemale_09_100_i allfemale_09_100_j leadfemale_09_100_i leadfemale_09_100_j sim_score num_citations_i num_citations_j num_inventors_i num_inventors_j avg_experience_i avg_experience_j patent_year_i patent_year_j assignee_id_i assignee_id_j main_cpc_section_i main_cpc_section_j main_cpc_subclass_i main_cpc_subclass_j assignee_country_i assignee_country_j xi_real_i xi_real_j patent_id_i patent_id_j
sort patent_id_i patent_id_j


*** Generate regression variables
* same main_cpc_subclass
gen same_main_cpc = 1 if main_cpc_subclass_i == main_cpc_subclass_j
replace same_main_cpc = 0 if same_main_cpc != 1

* same assignee_country
gen same_assignee_country = 1 if assignee_country_i == assignee_country_j & assignee_country_i != "" & assignee_country_j != ""
replace same_assignee_country = 0 if same_assignee_country != 1

* same assignee
gen same_assignee = 1 if assignee_id_i == assignee_id_j
replace same_assignee = 0 if same_assignee != 1

* years lag between citing patent and cited patent
gen years_lag = patent_year_i - patent_year_j

* KPSS commercial value (quality) variable (convert to dollar)
gen xi_dollar_real_i = xi_real_i * 1000000
gen xi_dollar_real_j = xi_real_j* 1000000
gen dollar_real_log_i = log(xi_dollar_real_i)
gen dollar_real_log_j = log(xi_dollar_real_j)

* Generate gender interaction variables
gen allfemale_09_100_ji = allfemale_09_100_i * allfemale_09_100_j
gen leadfemale_09_100_ji = leadfemale_09_100_i * leadfemale_09_100_j
gen allfemale_leadfemale_09_100_ji = allfemale_09_100_j * leadfemale_09_100_i
gen leadfemale_allfemale_09_100_ji = leadfemale_09_100_j * allfemale_09_100_i

encode assignee_id_i, gen(assignee_id_i_enc)
encode assignee_id_j, gen(assignee_id_j_enc)
encode main_cpc_section_i, gen(main_cpc_section_i_enc)
encode main_cpc_section_j, gen(main_cpc_section_j_enc)
drop if patent_year_i==. | main_cpc_section_i=="" | main_cpc_section_j==""


*** Run regressions
* Col (1)
reghdfe omission allfemale_09_100_j sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag, absorb(assignee_id_i_enc assignee_id_j_enc patent_year_i main_cpc_section_i_enc main_cpc_section_j_enc) vce(cluster patent_id_i)
estadd ysumm
outreg2 using $dir/reg_results/tableb13.doc, replace dec(4) keep(allfemale_09_100_j sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag) addtext(Firm i FE, Yes, Firm j FE, Yes, Year i FE, Yes, Main CPC Section i FE, Yes, Main CPC Section j FE, Yes, Cluster SE, Citing, Sample, All, Cutoff, 75) addstat(Mean of dependent variable, e(ymean))

* Col (2)
reghdfe omission allfemale_09_100_j allfemale_09_100_i allfemale_09_100_ji sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag, absorb(assignee_id_i_enc assignee_id_j_enc patent_year_i main_cpc_section_i_enc main_cpc_section_j_enc) vce(cluster patent_id_i)
estadd ysumm
outreg2 using $dir/reg_results/tableb13.doc, append dec(4) keep(allfemale_09_100_j allfemale_09_100_i allfemale_09_100_ji sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag) addtext(Firm i FE, Yes, Firm j FE, Yes, Year i FE, Yes, Main CPC Section i FE, Yes, Main CPC Section j FE, Yes, Cluster SE, Citing, Sample, All, Cutoff, 75) addstat(Mean of dependent variable, e(ymean))

* Col (3)
reghdfe omission allfemale_09_100_j sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag if sim_score>=0.8, absorb(assignee_id_i_enc assignee_id_j_enc patent_year_i main_cpc_section_i_enc main_cpc_section_j_enc) vce(cluster patent_id_i)
estadd ysumm
outreg2 using $dir/reg_results/tableb13.doc, append dec(4) keep(allfemale_09_100_j sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag) addtext(Firm i FE, Yes, Firm j FE, Yes, Year i FE, Yes, Main CPC Section i FE, Yes, Main CPC Section j FE, Yes, Cluster SE, Citing, Sample, All, Cutoff, 80) addstat(Mean of dependent variable, e(ymean))

* Col (4)
reghdfe omission allfemale_09_100_j allfemale_09_100_i allfemale_09_100_ji sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag if sim_score>=0.8, absorb(assignee_id_i_enc assignee_id_j_enc patent_year_i main_cpc_section_i_enc main_cpc_section_j_enc) vce(cluster patent_id_i)
estadd ysumm
outreg2 using $dir/reg_results/tableb13.doc, append dec(4) keep(allfemale_09_100_j allfemale_09_100_i allfemale_09_100_ji sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag) addtext(Firm i FE, Yes, Firm j FE, Yes, Year i FE, Yes, Main CPC Section i FE, Yes, Main CPC Section j FE, Yes, Cluster SE, Citing, Sample, All, Cutoff, 80) addstat(Mean of dependent variable, e(ymean))

* Col (5)
reghdfe omission allfemale_09_100_j sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag if sim_score>=0.85, absorb(assignee_id_i_enc assignee_id_j_enc patent_year_i main_cpc_section_i_enc main_cpc_section_j_enc) vce(cluster patent_id_i)
estadd ysumm
outreg2 using $dir/reg_results/tableb13.doc, append dec(4) keep(allfemale_09_100_j sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag) addtext(Firm i FE, Yes, Firm j FE, Yes, Year i FE, Yes, Main CPC Section i FE, Yes, Main CPC Section j FE, Yes, Cluster SE, Citing, Sample, All, Cutoff, 85) addstat(Mean of dependent variable, e(ymean))

* Col (6)
reghdfe omission allfemale_09_100_j allfemale_09_100_i allfemale_09_100_ji sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag if sim_score>=0.85, absorb(assignee_id_i_enc assignee_id_j_enc patent_year_i main_cpc_section_i_enc main_cpc_section_j_enc) vce(cluster patent_id_i)
estadd ysumm
outreg2 using $dir/reg_results/tableb13.doc, append dec(4) keep(allfemale_09_100_j allfemale_09_100_i allfemale_09_100_ji sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag) addtext(Firm i FE, Yes, Firm j FE, Yes, Year i FE, Yes, Main CPC Section i FE, Yes, Main CPC Section j FE, Yes, Cluster SE, Citing, Sample, All, Cutoff, 85) addstat(Mean of dependent variable, e(ymean))

* Col (7)
reghdfe omission allfemale_09_100_j sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag if sim_score>=0.9, absorb(assignee_id_i_enc assignee_id_j_enc patent_year_i main_cpc_section_i_enc main_cpc_section_j_enc) vce(cluster patent_id_i)
estadd ysumm
outreg2 using $dir/reg_results/tableb13.doc, append dec(4) keep(allfemale_09_100_j sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag) addtext(Firm i FE, Yes, Firm j FE, Yes, Year i FE, Yes, Main CPC Section i FE, Yes, Main CPC Section j FE, Yes, Cluster SE, Citing, Sample, All, Cutoff, 90) addstat(Mean of dependent variable, e(ymean))

* Col (8)
reghdfe omission allfemale_09_100_j allfemale_09_100_i allfemale_09_100_ji sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag if sim_score>=0.9, absorb(assignee_id_i_enc assignee_id_j_enc patent_year_i main_cpc_section_i_enc main_cpc_section_j_enc) vce(cluster patent_id_i)
estadd ysumm
outreg2 using $dir/reg_results/tableb13.doc, append dec(4) keep(allfemale_09_100_j allfemale_09_100_i allfemale_09_100_ji sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag) addtext(Firm i FE, Yes, Firm j FE, Yes, Year i FE, Yes, Main CPC Section i FE, Yes, Main CPC Section j FE, Yes, Cluster SE, Citing, Sample, All, Cutoff, 90) addstat(Mean of dependent variable, e(ymean))

